{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#----------------------------------------------------------------------\n",
    "# Purpose:  Condition an Airline dataset by filtering out NAs where the\n",
    "#           departure delay in the input dataset is unknown.\n",
    "#\n",
    "#           Then treat anything longer than minutesOfDelayWeTolerate\n",
    "#           as delayed.\n",
    "#----------------------------------------------------------------------"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import h2o\n",
    "from h2o.estimators.gbm import H2OGradientBoostingEstimator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>H2O cluster uptime: </td>\n",
       "<td>19 minutes 25 seconds 953 milliseconds </td></tr>\n",
       "<tr><td>H2O cluster version: </td>\n",
       "<td>3.5.0.99999</td></tr>\n",
       "<tr><td>H2O cluster name: </td>\n",
       "<td>spIdea</td></tr>\n",
       "<tr><td>H2O cluster total nodes: </td>\n",
       "<td>1</td></tr>\n",
       "<tr><td>H2O cluster total memory: </td>\n",
       "<td>12.44 GB</td></tr>\n",
       "<tr><td>H2O cluster total cores: </td>\n",
       "<td>8</td></tr>\n",
       "<tr><td>H2O cluster allowed cores: </td>\n",
       "<td>8</td></tr>\n",
       "<tr><td>H2O cluster healthy: </td>\n",
       "<td>True</td></tr>\n",
       "<tr><td>H2O Connection ip: </td>\n",
       "<td>127.0.0.1</td></tr>\n",
       "<tr><td>H2O Connection port: </td>\n",
       "<td>54321</td></tr></table></div>"
      ],
      "text/plain": [
       "--------------------------  --------------------------------------\n",
       "H2O cluster uptime:         19 minutes 25 seconds 953 milliseconds\n",
       "H2O cluster version:        3.5.0.99999\n",
       "H2O cluster name:           spIdea\n",
       "H2O cluster total nodes:    1\n",
       "H2O cluster total memory:   12.44 GB\n",
       "H2O cluster total cores:    8\n",
       "H2O cluster allowed cores:  8\n",
       "H2O cluster healthy:        True\n",
       "H2O Connection ip:          127.0.0.1\n",
       "H2O Connection port:        54321\n",
       "--------------------------  --------------------------------------"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "h2o.init()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Parse Progress: [##################################################] 100%\n"
     ]
    }
   ],
   "source": [
    "from h2o.utils.shared_utils import _locate # private function. used to find files within h2o git project directory.\n",
    "\n",
    "air = h2o.import_file(_locate(\"smalldata/airlines/allyears2k_headers.zip\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original dataset rows: 43978, columns: 31\n",
      "New dataset rows: 42892, columns: 31\n"
     ]
    }
   ],
   "source": [
    "numRows, numCols = air.dim\n",
    "print(\"Original dataset rows: {0}, columns: {1}\".format(numRows, numCols))\n",
    "\n",
    "x_cols = [\"Month\", \"DayofMonth\", \"DayOfWeek\", \"CRSDepTime\", \"CRSArrTime\", \"UniqueCarrier\", \"CRSElapsedTime\", \"Origin\", \"Dest\", \"Distance\"]\n",
    "y_col = \"SynthDepDelayed\"\n",
    "\n",
    "noDepDelayedNAs = air[air[\"DepDelay\"].isna() == 0]\n",
    "rows, cols = noDepDelayedNAs.dim\n",
    "print(\"New dataset rows: {0}, columns: {1}\".format(rows, cols))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<tr><th style=\"text-align: right;\">  Year</th><th style=\"text-align: right;\">  Month</th><th style=\"text-align: right;\">  DayofMonth</th><th style=\"text-align: right;\">  DayOfWeek</th><th style=\"text-align: right;\">  DepTime</th><th style=\"text-align: right;\">  CRSDepTime</th><th style=\"text-align: right;\">  ArrTime</th><th style=\"text-align: right;\">  CRSArrTime</th><th style=\"text-align: right;\">  UniqueCarrier</th><th style=\"text-align: right;\">  FlightNum</th><th style=\"text-align: right;\">  TailNum</th><th style=\"text-align: right;\">  ActualElapsedTime</th><th style=\"text-align: right;\">  CRSElapsedTime</th><th style=\"text-align: right;\">  AirTime</th><th style=\"text-align: right;\">  ArrDelay</th><th style=\"text-align: right;\">  DepDelay</th><th style=\"text-align: right;\">  Origin</th><th style=\"text-align: right;\">  Dest</th><th style=\"text-align: right;\">  Distance</th><th style=\"text-align: right;\">  TaxiIn</th><th style=\"text-align: right;\">  TaxiOut</th><th style=\"text-align: right;\">  Cancelled</th><th style=\"text-align: right;\">  CancellationCode</th><th style=\"text-align: right;\">  Diverted</th><th style=\"text-align: right;\">  CarrierDelay</th><th style=\"text-align: right;\">  WeatherDelay</th><th style=\"text-align: right;\">  NASDelay</th><th style=\"text-align: right;\">  SecurityDelay</th><th style=\"text-align: right;\">  LateAircraftDelay</th><th style=\"text-align: right;\">  IsArrDelayed</th><th style=\"text-align: right;\">  IsDepDelayed</th><th style=\"text-align: right;\">  SynthDepDelayed</th></tr>\n",
       "<tr><td style=\"text-align: right;\">  1987</td><td style=\"text-align: right;\">     10</td><td style=\"text-align: right;\">          14</td><td style=\"text-align: right;\">          3</td><td style=\"text-align: right;\">      741</td><td style=\"text-align: right;\">         730</td><td style=\"text-align: right;\">      912</td><td style=\"text-align: right;\">         849</td><td style=\"text-align: right;\">              5</td><td style=\"text-align: right;\">       1451</td><td style=\"text-align: right;\">     3499</td><td style=\"text-align: right;\">                 91</td><td style=\"text-align: right;\">              79</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">        23</td><td style=\"text-align: right;\">        11</td><td style=\"text-align: right;\">     106</td><td style=\"text-align: right;\">   118</td><td style=\"text-align: right;\">       447</td><td style=\"text-align: right;\">     nan</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">                 3</td><td style=\"text-align: right;\">         0</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">            nan</td><td style=\"text-align: right;\">                nan</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">                1</td></tr>\n",
       "<tr><td style=\"text-align: right;\">  1987</td><td style=\"text-align: right;\">     10</td><td style=\"text-align: right;\">          15</td><td style=\"text-align: right;\">          4</td><td style=\"text-align: right;\">      729</td><td style=\"text-align: right;\">         730</td><td style=\"text-align: right;\">      903</td><td style=\"text-align: right;\">         849</td><td style=\"text-align: right;\">              5</td><td style=\"text-align: right;\">       1451</td><td style=\"text-align: right;\">     3499</td><td style=\"text-align: right;\">                 94</td><td style=\"text-align: right;\">              79</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">        14</td><td style=\"text-align: right;\">        -1</td><td style=\"text-align: right;\">     106</td><td style=\"text-align: right;\">   118</td><td style=\"text-align: right;\">       447</td><td style=\"text-align: right;\">     nan</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">                 3</td><td style=\"text-align: right;\">         0</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">            nan</td><td style=\"text-align: right;\">                nan</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">                0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">  1987</td><td style=\"text-align: right;\">     10</td><td style=\"text-align: right;\">          17</td><td style=\"text-align: right;\">          6</td><td style=\"text-align: right;\">      741</td><td style=\"text-align: right;\">         730</td><td style=\"text-align: right;\">      918</td><td style=\"text-align: right;\">         849</td><td style=\"text-align: right;\">              5</td><td style=\"text-align: right;\">       1451</td><td style=\"text-align: right;\">     3499</td><td style=\"text-align: right;\">                 97</td><td style=\"text-align: right;\">              79</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">        29</td><td style=\"text-align: right;\">        11</td><td style=\"text-align: right;\">     106</td><td style=\"text-align: right;\">   118</td><td style=\"text-align: right;\">       447</td><td style=\"text-align: right;\">     nan</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">                 3</td><td style=\"text-align: right;\">         0</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">            nan</td><td style=\"text-align: right;\">                nan</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">                1</td></tr>\n",
       "<tr><td style=\"text-align: right;\">  1987</td><td style=\"text-align: right;\">     10</td><td style=\"text-align: right;\">          18</td><td style=\"text-align: right;\">          7</td><td style=\"text-align: right;\">      729</td><td style=\"text-align: right;\">         730</td><td style=\"text-align: right;\">      847</td><td style=\"text-align: right;\">         849</td><td style=\"text-align: right;\">              5</td><td style=\"text-align: right;\">       1451</td><td style=\"text-align: right;\">     3499</td><td style=\"text-align: right;\">                 78</td><td style=\"text-align: right;\">              79</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">        -2</td><td style=\"text-align: right;\">        -1</td><td style=\"text-align: right;\">     106</td><td style=\"text-align: right;\">   118</td><td style=\"text-align: right;\">       447</td><td style=\"text-align: right;\">     nan</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">                 3</td><td style=\"text-align: right;\">         0</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">            nan</td><td style=\"text-align: right;\">                nan</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">                0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">  1987</td><td style=\"text-align: right;\">     10</td><td style=\"text-align: right;\">          19</td><td style=\"text-align: right;\">          1</td><td style=\"text-align: right;\">      749</td><td style=\"text-align: right;\">         730</td><td style=\"text-align: right;\">      922</td><td style=\"text-align: right;\">         849</td><td style=\"text-align: right;\">              5</td><td style=\"text-align: right;\">       1451</td><td style=\"text-align: right;\">     3499</td><td style=\"text-align: right;\">                 93</td><td style=\"text-align: right;\">              79</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">        33</td><td style=\"text-align: right;\">        19</td><td style=\"text-align: right;\">     106</td><td style=\"text-align: right;\">   118</td><td style=\"text-align: right;\">       447</td><td style=\"text-align: right;\">     nan</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">                 3</td><td style=\"text-align: right;\">         0</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">            nan</td><td style=\"text-align: right;\">                nan</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">                1</td></tr>\n",
       "<tr><td style=\"text-align: right;\">  1987</td><td style=\"text-align: right;\">     10</td><td style=\"text-align: right;\">          21</td><td style=\"text-align: right;\">          3</td><td style=\"text-align: right;\">      728</td><td style=\"text-align: right;\">         730</td><td style=\"text-align: right;\">      848</td><td style=\"text-align: right;\">         849</td><td style=\"text-align: right;\">              5</td><td style=\"text-align: right;\">       1451</td><td style=\"text-align: right;\">     3499</td><td style=\"text-align: right;\">                 80</td><td style=\"text-align: right;\">              79</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">        -1</td><td style=\"text-align: right;\">        -2</td><td style=\"text-align: right;\">     106</td><td style=\"text-align: right;\">   118</td><td style=\"text-align: right;\">       447</td><td style=\"text-align: right;\">     nan</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">                 3</td><td style=\"text-align: right;\">         0</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">            nan</td><td style=\"text-align: right;\">                nan</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">                0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">  1987</td><td style=\"text-align: right;\">     10</td><td style=\"text-align: right;\">          22</td><td style=\"text-align: right;\">          4</td><td style=\"text-align: right;\">      728</td><td style=\"text-align: right;\">         730</td><td style=\"text-align: right;\">      852</td><td style=\"text-align: right;\">         849</td><td style=\"text-align: right;\">              5</td><td style=\"text-align: right;\">       1451</td><td style=\"text-align: right;\">     3499</td><td style=\"text-align: right;\">                 84</td><td style=\"text-align: right;\">              79</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">         3</td><td style=\"text-align: right;\">        -2</td><td style=\"text-align: right;\">     106</td><td style=\"text-align: right;\">   118</td><td style=\"text-align: right;\">       447</td><td style=\"text-align: right;\">     nan</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">                 3</td><td style=\"text-align: right;\">         0</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">            nan</td><td style=\"text-align: right;\">                nan</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">                0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">  1987</td><td style=\"text-align: right;\">     10</td><td style=\"text-align: right;\">          23</td><td style=\"text-align: right;\">          5</td><td style=\"text-align: right;\">      731</td><td style=\"text-align: right;\">         730</td><td style=\"text-align: right;\">      902</td><td style=\"text-align: right;\">         849</td><td style=\"text-align: right;\">              5</td><td style=\"text-align: right;\">       1451</td><td style=\"text-align: right;\">     3499</td><td style=\"text-align: right;\">                 91</td><td style=\"text-align: right;\">              79</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">        13</td><td style=\"text-align: right;\">         1</td><td style=\"text-align: right;\">     106</td><td style=\"text-align: right;\">   118</td><td style=\"text-align: right;\">       447</td><td style=\"text-align: right;\">     nan</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">                 3</td><td style=\"text-align: right;\">         0</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">            nan</td><td style=\"text-align: right;\">                nan</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">                1</td></tr>\n",
       "<tr><td style=\"text-align: right;\">  1987</td><td style=\"text-align: right;\">     10</td><td style=\"text-align: right;\">          24</td><td style=\"text-align: right;\">          6</td><td style=\"text-align: right;\">      744</td><td style=\"text-align: right;\">         730</td><td style=\"text-align: right;\">      908</td><td style=\"text-align: right;\">         849</td><td style=\"text-align: right;\">              5</td><td style=\"text-align: right;\">       1451</td><td style=\"text-align: right;\">     3499</td><td style=\"text-align: right;\">                 84</td><td style=\"text-align: right;\">              79</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">        19</td><td style=\"text-align: right;\">        14</td><td style=\"text-align: right;\">     106</td><td style=\"text-align: right;\">   118</td><td style=\"text-align: right;\">       447</td><td style=\"text-align: right;\">     nan</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">                 3</td><td style=\"text-align: right;\">         0</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">            nan</td><td style=\"text-align: right;\">                nan</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">                1</td></tr>\n",
       "<tr><td style=\"text-align: right;\">  1987</td><td style=\"text-align: right;\">     10</td><td style=\"text-align: right;\">          25</td><td style=\"text-align: right;\">          7</td><td style=\"text-align: right;\">      729</td><td style=\"text-align: right;\">         730</td><td style=\"text-align: right;\">      851</td><td style=\"text-align: right;\">         849</td><td style=\"text-align: right;\">              5</td><td style=\"text-align: right;\">       1451</td><td style=\"text-align: right;\">     3499</td><td style=\"text-align: right;\">                 82</td><td style=\"text-align: right;\">              79</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">         2</td><td style=\"text-align: right;\">        -1</td><td style=\"text-align: right;\">     106</td><td style=\"text-align: right;\">   118</td><td style=\"text-align: right;\">       447</td><td style=\"text-align: right;\">     nan</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">                 3</td><td style=\"text-align: right;\">         0</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">           nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">            nan</td><td style=\"text-align: right;\">                nan</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">                0</td></tr>\n",
       "</table>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "minutesOfDelayWeTolerate = 15\n",
    "noDepDelayedNAs = noDepDelayedNAs.cbind(noDepDelayedNAs[\"DepDelay\"] > minutesOfDelayWeTolerate)\n",
    "noDepDelayedNAs[numCols] = noDepDelayedNAs[numCols-1].asfactor()\n",
    "noDepDelayedNAs.set_name(numCols,y_col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "gbm Model Build Progress: [##################################################] 100%\n",
      "Model Details\n",
      "=============\n",
      "H2OGradientBoostingEstimator :  Gradient Boosting Machine\n",
      "Model Key:  GBM_model_python_1445841486633_37\n",
      "\n",
      "Model Summary:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td><b></b></td>\n",
       "<td><b>number_of_trees</b></td>\n",
       "<td><b>model_size_in_bytes</b></td>\n",
       "<td><b>min_depth</b></td>\n",
       "<td><b>max_depth</b></td>\n",
       "<td><b>mean_depth</b></td>\n",
       "<td><b>min_leaves</b></td>\n",
       "<td><b>max_leaves</b></td>\n",
       "<td><b>mean_leaves</b></td></tr>\n",
       "<tr><td></td>\n",
       "<td>50.0</td>\n",
       "<td>34338.0</td>\n",
       "<td>5.0</td>\n",
       "<td>5.0</td>\n",
       "<td>5.0</td>\n",
       "<td>18.0</td>\n",
       "<td>32.0</td>\n",
       "<td>28.62</td></tr></table></div>"
      ],
      "text/plain": [
       "    number_of_trees    model_size_in_bytes    min_depth    max_depth    mean_depth    min_leaves    max_leaves    mean_leaves\n",
       "--  -----------------  ---------------------  -----------  -----------  ------------  ------------  ------------  -------------\n",
       "    50                 34338                  5            5            5             18            32            28.62"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "ModelMetricsBinomial: gbm\n",
      "** Reported on train data. **\n",
      "\n",
      "MSE: 0.191672191035\n",
      "R^2: 0.232789986813\n",
      "LogLoss: 0.565710073073\n",
      "AUC: 0.785428554449\n",
      "Gini: 0.570857108897\n",
      "\n",
      "Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.412557029006:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td><b></b></td>\n",
       "<td><b>NO</b></td>\n",
       "<td><b>YES</b></td>\n",
       "<td><b>Error</b></td>\n",
       "<td><b>Rate</b></td></tr>\n",
       "<tr><td>NO</td>\n",
       "<td>11180.0</td>\n",
       "<td>9707.0</td>\n",
       "<td>0.4647</td>\n",
       "<td> (9707.0/20887.0)</td></tr>\n",
       "<tr><td>YES</td>\n",
       "<td>3402.0</td>\n",
       "<td>18603.0</td>\n",
       "<td>0.1546</td>\n",
       "<td> (3402.0/22005.0)</td></tr>\n",
       "<tr><td>Total</td>\n",
       "<td>14582.0</td>\n",
       "<td>28310.0</td>\n",
       "<td>0.3056</td>\n",
       "<td> (13109.0/42892.0)</td></tr></table></div>"
      ],
      "text/plain": [
       "       NO     YES    Error    Rate\n",
       "-----  -----  -----  -------  -----------------\n",
       "NO     11180  9707   0.4647   (9707.0/20887.0)\n",
       "YES    3402   18603  0.1546   (3402.0/22005.0)\n",
       "Total  14582  28310  0.3056   (13109.0/42892.0)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Maximum Metrics: Maximum metrics at their respective thresholds\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td><b>metric</b></td>\n",
       "<td><b>threshold</b></td>\n",
       "<td><b>value</b></td>\n",
       "<td><b>idx</b></td></tr>\n",
       "<tr><td>max f1</td>\n",
       "<td>0.4</td>\n",
       "<td>0.7</td>\n",
       "<td>259.0</td></tr>\n",
       "<tr><td>max f2</td>\n",
       "<td>0.2</td>\n",
       "<td>0.8</td>\n",
       "<td>347.0</td></tr>\n",
       "<tr><td>max f0point5</td>\n",
       "<td>0.6</td>\n",
       "<td>0.7</td>\n",
       "<td>180.0</td></tr>\n",
       "<tr><td>max accuracy</td>\n",
       "<td>0.5</td>\n",
       "<td>0.7</td>\n",
       "<td>213.0</td></tr>\n",
       "<tr><td>max precision</td>\n",
       "<td>1.0</td>\n",
       "<td>1.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>max absolute_MCC</td>\n",
       "<td>0.5</td>\n",
       "<td>0.4</td>\n",
       "<td>213.0</td></tr>\n",
       "<tr><td>max min_per_class_accuracy</td>\n",
       "<td>0.5</td>\n",
       "<td>0.7</td>\n",
       "<td>209.0</td></tr></table></div>"
      ],
      "text/plain": [
       "metric                      threshold    value     idx\n",
       "--------------------------  -----------  --------  -----\n",
       "max f1                      0.412557     0.739461  259\n",
       "max f2                      0.236201     0.8477    347\n",
       "max f0point5                0.559094     0.727325  180\n",
       "max accuracy                0.500453     0.711578  213\n",
       "max precision               0.956629     1         0\n",
       "max absolute_MCC            0.500453     0.422602  213\n",
       "max min_per_class_accuracy  0.508533     0.710825  209"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Scoring History:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td><b></b></td>\n",
       "<td><b>timestamp</b></td>\n",
       "<td><b>duration</b></td>\n",
       "<td><b>number_of_trees</b></td>\n",
       "<td><b>training_MSE</b></td>\n",
       "<td><b>training_logloss</b></td>\n",
       "<td><b>training_AUC</b></td>\n",
       "<td><b>training_classification_error</b></td></tr>\n",
       "<tr><td></td>\n",
       "<td>2015-10-25 23:57:36</td>\n",
       "<td> 0.032 sec</td>\n",
       "<td>1.0</td>\n",
       "<td>0.2</td>\n",
       "<td>0.7</td>\n",
       "<td>0.7</td>\n",
       "<td>0.4</td></tr>\n",
       "<tr><td></td>\n",
       "<td>2015-10-25 23:57:36</td>\n",
       "<td> 0.059 sec</td>\n",
       "<td>2.0</td>\n",
       "<td>0.2</td>\n",
       "<td>0.7</td>\n",
       "<td>0.7</td>\n",
       "<td>0.4</td></tr>\n",
       "<tr><td></td>\n",
       "<td>2015-10-25 23:57:36</td>\n",
       "<td> 0.089 sec</td>\n",
       "<td>3.0</td>\n",
       "<td>0.2</td>\n",
       "<td>0.7</td>\n",
       "<td>0.7</td>\n",
       "<td>0.4</td></tr>\n",
       "<tr><td></td>\n",
       "<td>2015-10-25 23:57:36</td>\n",
       "<td> 0.117 sec</td>\n",
       "<td>4.0</td>\n",
       "<td>0.2</td>\n",
       "<td>0.7</td>\n",
       "<td>0.7</td>\n",
       "<td>0.4</td></tr>\n",
       "<tr><td></td>\n",
       "<td>2015-10-25 23:57:36</td>\n",
       "<td> 0.152 sec</td>\n",
       "<td>5.0</td>\n",
       "<td>0.2</td>\n",
       "<td>0.7</td>\n",
       "<td>0.7</td>\n",
       "<td>0.4</td></tr>\n",
       "<tr><td>---</td>\n",
       "<td>---</td>\n",
       "<td>---</td>\n",
       "<td>---</td>\n",
       "<td>---</td>\n",
       "<td>---</td>\n",
       "<td>---</td>\n",
       "<td>---</td></tr>\n",
       "<tr><td></td>\n",
       "<td>2015-10-25 23:57:38</td>\n",
       "<td> 2.267 sec</td>\n",
       "<td>46.0</td>\n",
       "<td>0.2</td>\n",
       "<td>0.6</td>\n",
       "<td>0.8</td>\n",
       "<td>0.3</td></tr>\n",
       "<tr><td></td>\n",
       "<td>2015-10-25 23:57:38</td>\n",
       "<td> 2.323 sec</td>\n",
       "<td>47.0</td>\n",
       "<td>0.2</td>\n",
       "<td>0.6</td>\n",
       "<td>0.8</td>\n",
       "<td>0.3</td></tr>\n",
       "<tr><td></td>\n",
       "<td>2015-10-25 23:57:38</td>\n",
       "<td> 2.378 sec</td>\n",
       "<td>48.0</td>\n",
       "<td>0.2</td>\n",
       "<td>0.6</td>\n",
       "<td>0.8</td>\n",
       "<td>0.3</td></tr>\n",
       "<tr><td></td>\n",
       "<td>2015-10-25 23:57:38</td>\n",
       "<td> 2.435 sec</td>\n",
       "<td>49.0</td>\n",
       "<td>0.2</td>\n",
       "<td>0.6</td>\n",
       "<td>0.8</td>\n",
       "<td>0.3</td></tr>\n",
       "<tr><td></td>\n",
       "<td>2015-10-25 23:57:38</td>\n",
       "<td> 2.493 sec</td>\n",
       "<td>50.0</td>\n",
       "<td>0.2</td>\n",
       "<td>0.6</td>\n",
       "<td>0.8</td>\n",
       "<td>0.3</td></tr></table></div>"
      ],
      "text/plain": [
       "     timestamp            duration    number_of_trees    training_MSE    training_logloss    training_AUC    training_classification_error\n",
       "---  -------------------  ----------  -----------------  --------------  ------------------  --------------  -------------------------------\n",
       "     2015-10-25 23:57:36  0.032 sec   1.0                0.244362718818  0.681856365041      0.692503203228  0.413433740558\n",
       "     2015-10-25 23:57:36  0.059 sec   2.0                0.239916651379  0.672915590687      0.700446640048  0.410845845379\n",
       "     2015-10-25 23:57:36  0.089 sec   3.0                0.235500532513  0.663968419721      0.712157594375  0.391051944419\n",
       "     2015-10-25 23:57:36  0.117 sec   4.0                0.231804609029  0.656396477796      0.717626212056  0.387158444465\n",
       "     2015-10-25 23:57:36  0.152 sec   5.0                0.228800636002  0.650232244088      0.72532125588   0.377040007461\n",
       "---  ---                  ---         ---                ---             ---                 ---             ---\n",
       "     2015-10-25 23:57:38  2.267 sec   46.0               0.192702810811  0.568263251703      0.783306118911  0.305721346638\n",
       "     2015-10-25 23:57:38  2.323 sec   47.0               0.192356155909  0.567421568826      0.783968341044  0.304695514315\n",
       "     2015-10-25 23:57:38  2.378 sec   48.0               0.192132888018  0.566844161055      0.784382761975  0.302247505362\n",
       "     2015-10-25 23:57:38  2.435 sec   49.0               0.191914363235  0.566306525033      0.784868956573  0.306327520284\n",
       "     2015-10-25 23:57:38  2.493 sec   50.0               0.191672191035  0.565710073073      0.785428554449  0.305628089154"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Variable Importances:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td><b>variable</b></td>\n",
       "<td><b>relative_importance</b></td>\n",
       "<td><b>scaled_importance</b></td>\n",
       "<td><b>percentage</b></td></tr>\n",
       "<tr><td>Origin</td>\n",
       "<td>6877.3</td>\n",
       "<td>1.0</td>\n",
       "<td>0.4</td></tr>\n",
       "<tr><td>Dest</td>\n",
       "<td>4551.0</td>\n",
       "<td>0.7</td>\n",
       "<td>0.3</td></tr>\n",
       "<tr><td>DayofMonth</td>\n",
       "<td>2025.6</td>\n",
       "<td>0.3</td>\n",
       "<td>0.1</td></tr>\n",
       "<tr><td>UniqueCarrier</td>\n",
       "<td>1279.5</td>\n",
       "<td>0.2</td>\n",
       "<td>0.1</td></tr>\n",
       "<tr><td>CRSArrTime</td>\n",
       "<td>724.8</td>\n",
       "<td>0.1</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>CRSDepTime</td>\n",
       "<td>636.9</td>\n",
       "<td>0.1</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>DayOfWeek</td>\n",
       "<td>408.2</td>\n",
       "<td>0.1</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>CRSElapsedTime</td>\n",
       "<td>118.8</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>Month</td>\n",
       "<td>73.3</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>Distance</td>\n",
       "<td>31.1</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td></tr></table></div>"
      ],
      "text/plain": [
       "variable        relative_importance    scaled_importance    percentage\n",
       "--------------  ---------------------  -------------------  ------------\n",
       "Origin          6877.27                1                    0.411159\n",
       "Dest            4551.03                0.66175              0.272085\n",
       "DayofMonth      2025.55                0.294529             0.121098\n",
       "UniqueCarrier   1279.5                 0.186048             0.0764954\n",
       "CRSArrTime      724.814                0.105393             0.0433331\n",
       "CRSDepTime      636.901                0.0926096            0.0380773\n",
       "DayOfWeek       408.238                0.0593605            0.0244066\n",
       "CRSElapsedTime  118.821                0.0172773            0.00710371\n",
       "Month           73.2614                0.0106527            0.00437995\n",
       "Distance        31.1477                0.00452908           0.00186217"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "gbm = H2OGradientBoostingEstimator(distribution=\"bernoulli\")\n",
    "gbm.train(x=x_cols, y=y_col, training_frame = noDepDelayedNAs)\n",
    "gbm.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
